home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
MACD 5
/
MACD 5.bin
/
workbench
/
tools
/
czesc_2
/
makewords
/
source
/
unjumble.c
< prev
next >
Wrap
C/C++ Source or Header
|
1990-08-12
|
10KB
|
366 lines
/* unjumble.c - try to make words from a jumbled string of letters.
Ron Charlton
9002 Balcor Circle
Knoxville, TN 37923
Phone: (615)694-0800
PLINK: R*CHARLTON
BINTNET: charltr@utkvx1
05-Jul-90
This program is in the public domain. It may be used for any purpose.
Algorithm: Generate all of the permutations of the letters in a string
and test each one for its "trigram weight". (A trigram is a group of three
letters, e.g., AAA, EWE, QQQ.) Use a table of trigram weights (frequencies)
derived from a 38,500 word dictionary. Calculate an n-letter string's
trigram weight as the sum of the weights of its (n - 2) trigrams. A
trigram's weight is the log base 2 of its frequency of occurrence in
the dictionary.
Keep a table of the twenty highest weights while avoiding duplicates due to
multiple occurrences of a single letter. Print out only the non-zero
entries in the table. This all works because some trigrams (such as "ING")
are more likely than others (such as "QQQ").
The trigram data are stored on disk as a nybble_table and a bit_table for
compactness, then converted to an array (nn[][][]) for execution speed.
The string to be unjumbled is adjusted by making 'A' 1, 'B' 2, ... 'Z' 26
so that the characters can be used as indices into nn[][][].
This allows nn[][][] to be as small as possible (leaving '\0' available as
the end-of-string indicator).
history:
v1.3 - don't include any 'word' with any individual trigram weight of 0
v1.4 - general cleanup (& change to strupr)
v1.5 - added exit() from main() & editorial comments & misc. tweakings
v1.6 - changed to structure for table of words and weights
v1.7 - changed do{}while() to for() in print_table(); READ_NYBBLE(); test_bit
v1.8 - TEST_BIT() as macro; removed unused variables in main()
v1.9 - rearranged main(), changed function names, modified intro(), strlwr()
v1.10 removed strlwr() definition
v1.11 changed temp variable to static in insert()
v1.12 eliminated second strcpy() in permute() (faster!!)
v1.13 allow 8-letter words, add 'nothing found'
v1.14 adjust MAXLEN, restored strlwr() definition, conbined printf's in intro()
*/
char version[] = "v1.14";
#include <stdio.h>
#include <ctype.h>
#define CSI 0x9b
#define CLS 0x0c
#define FALSE 0
#define TRUE (!FALSE)
#define MAPVALUE ('a'-1) /* 'A' for uppercase, 'a' for lowercase */
#define MAXLEN 10 /* max. length of string to unjumble */
#define TABLESIZE 20 /* size of table of guesses */
/* read a nybble from a table of nybbles (32-bit longs in table) */
#define READ_NYBBLE( nyb_number ) \
( (nybble_table [ nyb_number >> 3 ] >> ((nyb_number & 7L) << 2)) & 0xFL )
/* test bit n in table of 32-bit ints */
#define TEST_BIT( n ) \
( bit_table [ n >> 5 ] & ( 1L << (n & 0x1FL) ) ? 1 : 0 )
/* the follow two lines are associated with tables.c */
extern int bit_table_size, nybble_table_size;
extern unsigned long bit_table[], nybble_table[];
/* structure array to hold candidate words and their weights */
struct {
char word [ MAXLEN + 1 ];
int weight;
} table [ TABLESIZE ];
unsigned char nn[27][27][27]; /* array of probabilities of trigrams */
int min_weight; /* used in insert() for minimum weight */
int min_weight_ptr; /* used in insert() to point at minimum */
char *strlwr();
/*------------------------------------------------------------------------*/
main()
{
char buf [ 255+1 ];
int length;
intro();
get_weights(); /* read weights into nn[][][] */
for (;;)
{
init_table();
printf ( "\tEnter 3 to 8 letters ( <RETURN> to quit ): ");
if ( ! gets ( buf ) )
break;
length = strlen ( buf );
if ( length == 0 )
break;
else if ( !all_letters ( buf ) )
printf ( "\tYou may only use letters (a-z; A-Z).\n\n" );
else if ( length > 8 )
printf ( "\tToo many letters.\n\n" );
else if ( length < 3 )
printf ( "\tToo few letters.\n\n" );
else
{
/* do the unjumbling */
strlwr ( buf );
map ( buf ); /* convert to array indices */
permute ( buf, 0, length ); /* do all permutations */
print_table(); /* display the results */
}
}
exit ( 0 );
}
/*------------------------------------------------------------------------*/
intro()
{
#ifdef MCH_AMIGA
printf ( "%c", CLS ); /* clear screen */
printf ( "%c0;33;40m", CSI ); /* color 3,0 */
#endif
printf ( "\n\t\t\t Unjumble %s\n\n"
"\t\t\t by\n\n"
"\t\t\t Ron Charlton\n"
"\t\t\t 9002 Balcor Circle\n"
"\t\t\t Knoxville, TN 37923\n"
"\t\t\t Plink: R*CHARLTON\n"
"\t\t\t BITNET: charltr@utkvx1\n"
"\t\t Internet: charltr@utkvx1.utk.edu\n\n", version );
#ifdef MCH_AMIGA
printf ( "%c0;31;40m", CSI ); /* color 1,0 */
#endif
printf ( "\tUnjumble tries to create words out of a group of letters\n"
"\tthat you enter. It will print out up to twenty guesses in\n"
"\torder with each guess' weight before it. Three to 8 letters at\n"
"\ta time are accepted. Eight letters requires several seconds of \n"
"\tcalculation. More letters are harder to unjumble.\n\n" );
}
/*------------------------------------------------------------------------*/
/* test for all letters in a string */
all_letters ( str )
char *str;
{
while ( isalpha ( *str++ ) )
;
return ( ! *--str ); /* pointing at '\0' if all alpha */
}
/*------------------------------------------------------------------------*/
/* convert letters to array indices (A-->1, B-->2 ... Z-->26) */
map ( str )
char *str;
{
while ( * str )
* str++ -= MAPVALUE;
}
/*------------------------------------------------------------------------*/
/* find sum of weights of trigrams in a string. */
weigh ( str )
char *str;
{
register int weight = 0, len, pos, triweight;
if ( (len = strlen ( str ) ) >= 3 )
for ( pos = 0; pos <= len - 3; pos++)
{
triweight = nn [ str [pos] ] [ str [pos+1] ] [ str [pos+2] ];
if ( triweight > 0 )
weight += triweight;
else
{
/* eliminate those with any trigram of 0 weight */
weight = 0;
break;
}
}
return ( weight );
}
/*------------------------------------------------------------------------*/
/* find all permutations of a string */
permute ( a, k, n )
char *a;
int k, n;
{
int i;
char b [ MAXLEN ];
static char temp;
if (k == n)
insert ( a ); /* insert into table if weighty */
else
{
strcpy(b, a);
for (i = k; i < n; i++)
{
temp = b [ k ];
b [ k ] = b [ i ];
b [ i ] = temp;
permute ( b, k+1, n );
}
}
}
/*------------------------------------------------------------------------*/
/* convert array indices to alphabetic 1-->A, 2-->B ... 26-->Z */
unmap ( str )
char *str;
{
while ( * str )
*str++ += MAPVALUE;
}
/*------------------------------------------------------------------------*/
/* initialize the guess table to empty */
init_table()
{
int i;
min_weight = 0;
min_weight_ptr = 0;
for ( i = 0; i < TABLESIZE; i++)
{
table[i].weight = 0;
table[i].word[0] = '\0';
}
}
/*------------------------------------------------------------------------*/
/* if this is a weighty word, insert it in the table */
insert ( str )
char *str;
{
register int weight, i;
weight = weigh ( str );
if ( weight > min_weight ) /* does this word weigh more than table min.? */
{
for ( i = 0; i < TABLESIZE; i++ ) /* avoid duplicates (BIGgER BIgGER) */
if ( ! strcmp ( table[i].word, str ) )
return; /* it's already in the table */
/* put this one in table in place of old minimum */
table [ min_weight_ptr ].weight = weight;
strcpy ( table [ min_weight_ptr ].word, str );
/* search for new minimum in the table */
min_weight_ptr = 0;
min_weight = table [ 0 ].weight;
for ( i = 1; i < TABLESIZE; i++ )
if ( table [ i ].weight < min_weight )
{
min_weight_ptr = i;
min_weight = table [ i ].weight;
}
}
}
/*------------------------------------------------------------------------*/
/* print the non-zero table entries in descending order. */
/* this is a bogus, but adequate, sort */
print_table()
{
int max_entry, i, first_pass = TRUE;
for(;;)
{
max_entry = 0; /* point at maximum weight */
for ( i = 1; i < TABLESIZE; i++ )
if ( table [ i ].weight > table [ max_entry ].weight )
max_entry = i;
if ( first_pass && table [ max_entry ].weight == 0 )
{
printf ( "\t\tNothing found.\n" );
break;
}
if ( table [ max_entry ].weight )
{
unmap ( table [ max_entry ].word );
printf("\t\t%5d %s\n", table [ max_entry ].weight,
table [ max_entry ].word );
table [ max_entry ].weight = 0; /* 'delete' from table */
}
else
break;
first_pass = FALSE;
}
}
/*------------------------------------------------------------------------*/
/* get trigram weights from nybble_table and bit_table into nn[][][] */
get_weights()
{
register int i, j, k, m = 0, ptr = 0;
for ( i = 1; i <= 26; i++ )
for ( j = 1; j <= 26; j++ )
for ( k = 1; k <= 26; k++ )
{
/* beware of ++x side effects */
if ( TEST_BIT ( m ) )
{
nn [ i ] [ j ] [ k ] = READ_NYBBLE ( ptr );
++ptr;
}
++m;
}
}
/*------------------------------------------------------------------------*/
/*
* Convert a string to lower case in place and return a pointer
* to the resulting string.
*/
char *
strlwr ( char *str )
{
register char *s = str;
while ( *s )
{
if (*s >= 'A' && *s <= 'Z')
*s = *s - 'A' + 'a';
++s;
}
return ( str );
}
/*-------------------------- END OF FILE ---------------------------------*/